	
cap program drop bunchest
program define bunchest, rclass
	args ub threshold
	
	*********************************************************************************
	*	Step 4: Collapse Data into counts of firms by revenue bins					*
	*********************************************************************************

					
		collapse (count) score , by(scoregroups)   //score is raw score; scoregroups = rounded scores
				
			*** Score polynomial terms 
			
			replace scoregroups = 0.1 if scoregroups == 0 

			gen  overallscore = scoregroups
			gen  overallscore2 = overallscore^2
			gen  overallscore3 = overallscore^3
			gen  overallscore4 = overallscore^4
//			gen  overallscore5 = overallscore^5
//			gen  overallscore6 = overallscore^6
						
		
	*** Estimate Bunching: 
	
	local ub = 92
	local threshold = 90
	mat results = (`ub',0,0,0,0,0,0,0,0)
	local lb1 = `threshold'-0.2

		forval i = `=`threshold'-.2' (-0.2) 64 {
		
			*di "Testing lower bound: `i'"
			
			cap drop exclregn exclregndummy 
			
			qui gen exclregn = overallscore if inrange(scoregroups,`i',`ub') // define dummies for bunching region
				qui replace exclregn = 0 if exclregn ==. 
			
			qui egen exclregndummy = group(exclregn)
			qui tab exclregndummy, matrow(regndummynames)
			local maxcats = rowsof(regndummynames) // save dummy name of the last score group  maxcats = 23
			qui levelsof exclregndummy if exclregn == `threshold', local(notch) //save dummy name of the notch group
			
			*Estimate counterfactual: 
						
			qui reg score i.exclregndummy overallscore overallscore2 overallscore3 overallscore4 
			
			* Identify starting and ending columns of results matrix with the coefficient dummies for excluded region
			
			mat A = e(b) // save results matrix
			
			local startcol =  colnumb(A,"2.exclregndummy")
			local notchcol = colnumb(A,"`notch'.exclregndummy")
			local afternotch = `notchcol' + 1
			local endcol =  colnumb(A,"`maxcats'.exclregndummy")   
			

			* Add up the missing mass:    
			
			mkmat score if inrange(overallscore,`i',`=`threshold'-.1')
				mat M_actual = score   // M_actual is 1x1
			
			mkmat overallscore if inrange(overallscore,`i',`=`threshold'-.1')
				mat Om = overallscore   //Om is 1x1
				

				
			local Mlen = rowsof(Om)   //missing mass length of matrix
			mat C = J(1,`Mlen',1)  // C is 1x1
			
			mata : st_matrix("Om2", st_matrix("Om"):^2)
			mata : st_matrix("Om3", st_matrix("Om"):^3)
			mata : st_matrix("Om4", st_matrix("Om"):^4)
//			mata : st_matrix("Om5", st_matrix("Om"):^5)
//			mata : st_matrix("Om6", st_matrix("Om"):^6)

			mat M_est = _b[_cons]*C' + _b[overallscore]*Om + _b[overallscore2]*Om2 + _b[overallscore3]*Om3 + _b[overallscore4]*Om4
			//+ _b[overallscore5]*Om5 + _b[overallscore6]*Om6
			
				
			mat M_missing = M_est - M_actual // mass in excess of counterfactual density: dim (threshold-lb)*5 x 1
			
			mat D = M_missing'*C'  // 1x1
			
			local missingmass = D[1,1]
			 
			* Add up the excess mass:     
			
			mkmat score if inrange(overallscore,`=`threshold'-.1',`ub')
				mat B_actual = score    // B_actual is dimension (ub-threshold)*5 x 1
			
			mkmat overallscore if inrange(overallscore,`=`threshold'-.1',`ub')
				mat Ob = overallscore   // Ob is 21x1
						
			local Blen = rowsof(Ob)   //missing mass length of matrix
			mat C = J(1,`Blen',1)    
				
			mata : st_matrix("Ob2", st_matrix("Ob"):^2)
			mata : st_matrix("Ob3", st_matrix("Ob"):^3)
			mata : st_matrix("Ob4", st_matrix("Ob"):^4)
//			mata : st_matrix("Ob5", st_matrix("Ob"):^5)
//			mata : st_matrix("Ob6", st_matrix("Ob"):^6)

			mat B_est = _b[_cons]*C' + _b[overallscore]*Ob + _b[overallscore2]*Ob2 + _b[overallscore3]*Ob3 + _b[overallscore4]*Ob4 
			//+ _b[overallscore5]*Ob5 + _b[overallscore6]*Ob6  // 21x1
			
			
			mat B_excess = B_actual - B_est  // mass in excess of counterfactual density
			
			mat D = B_excess'*C'   // 1x1
			
			local excessmass = D[1,1]
			
			* Difference between missing and excess mass: 
			
			local diff = `excessmass' - `missingmass'
			
			di "Difference between excess and missing mass is: `diff' for upper bound `i'"
			
				
			local cflb = _b[_cons] + `i'*_b[overallscore]+ (`i'^2)*_b[overallscore2] + (`i'^3)* _b[overallscore3] + (`i'^4)* _b[overallscore4] 
			//+ (`i'^5)* _b[overallscore5] + (`i'^6)* _b[overallscore6]
				
			local cfnotch = _b[_cons] + `threshold'*_b[overallscore]+ (`threshold'^2)*_b[overallscore2] + (`threshold'^3)* _b[overallscore3] + (`threshold'^4)* _b[overallscore4]
			//+ (`threshold'^5)* _b[overallscore5] + (`threshold'^6)* _b[overallscore6] 
				
			local cfub = _b[_cons] + `ub'*_b[overallscore]+ (`ub'^2)*_b[overallscore2] + (`ub'^3)* _b[overallscore3] + (`ub'^4)* _b[overallscore4]
			//+ (`ub'^5)* _b[overallscore5] + (`ub'^6)* _b[overallscore6]
			
			

			* Estimated average bunching: 
			
			local b_av = `excessmass'/(0.5*(`cflb'+`cfub'))
			
			mat resultsappend = (`ub',`i',`excessmass',`missingmass',`diff',`cflb',`cfub',`cfnotch',`b_av')
			mat results = results \ resultsappend
*			matrix list results
			
		}
			

	
		mat colnames results = ub lb excess missing diff cflb cfub cfnotch b_av
*		matrix list A
		
		clear 
		svmat results, names(col)
		
		* Keep the bunching estimate for the lowest difference between excess and missing mass: 
		
		drop if lb == 0 
		gen absdiff = abs(diff)
		sort absdiff
		
		keep if _n == 1
		mkmat _all, matrix(R)
		
//		matrix list A		
		
*		display R[1,2]
*		display R[1,3]
*		display R[1,8]		
		
		return scalar lb_est = R[1,2]
		return scalar excess_est = R[1,3]
		return scalar b_av_est = R[1,9]  
		return scalar cflb_est = R[1,6]
		return scalar cfub_est = R[1,7]
		return scalar cfnotch_est = R[1,8]  


end
